# -*- coding: utf-8 -*-
"""
Created on Thu Mar 19 10:25:30 2020

@author: Administrator
"""

import urllib
import urllib.request
from bs4 import BeautifulSoup
import re
import random
import time
import datetime

now_time = datetime.datetime.now().strftime('%Y-%m-%d')
# 设置目标url，使用urllib.request.Request创建请求
url0 = "http://yaocai.zyctd.com/"
req0 = urllib.request.Request(url0)

# 使用add_header设置请求头，将代码伪装成浏览器
req0.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")

# 使用urllib.request.urlopen打开页面，使用read方法保存html代码
html0 = urllib.request.urlopen(req0).read()

# 使用BeautifulSoup创建html代码的BeautifulSoup实例，存为soup0
soup0 = BeautifulSoup(html0)
print(soup0)

# 获取尾页（对照前一小节获取尾页的内容看你就明白了）
list1 = []
list2 = []
for i in range(1,22):
   
    list1.append(soup0.find("div",class_= "shape l").findAll("a")[i]['href'])
for i in range(1,15):
   
    list1.append(soup0.find("div",class_= "shape l ml40").findAll("a")[i]['href']) 
for i in range(1,22):
   
    list2.extend(soup0.find("div",class_= "shape l").findAll("a")[i])
for i in range(1,15):
   
    list2.extend(soup0.find("div",class_= "shape l ml40").findAll("a")[i])       

list3=[]

for j in range(len(list1)):
    # 设置随机暂停时间
    stop = random.uniform(1, 3)
    url = list1[j]
    req = urllib.request.Request(url)
    req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
    html = urllib.request.urlopen(req).read()
    soup = BeautifulSoup(html)
    contents = soup.find('table', class_="tableBase").findAll("tr") 
    #print (contents[1])
    l = len(contents)
    
    for i in range(1,l):
        list5 = []
        content1 = str(contents[2])
        user = re.search( r'<td>(.*?)</td><td>(.*?)</td><td>￥(.*?)</td><td class=".*?(.*?)/td><td>(.*?)</td></tr>', content1)
        
        user2 = re.search( r'>(.*?)<',str(user.group(4)))
        list5.append(list2[j])
        list5.append(user.group(1))
        list5.append(user.group(2))
        list5.append(user.group(3))
        list5.append(user2.group(1))
        list5.append(user.group(5))
        list5.append(now_time)
        
        list3.append(list5)
    
import openpyxl
data = openpyxl.load_workbook('E:/数据/药材网/每日药价表.xlsx')
print(data.get_named_ranges()) # 输出工作页索引范围
print(data.get_sheet_names()) # 输出所有工作页的名称
# 取第一张表
sheetnames = data.get_sheet_names()
table = data.get_sheet_by_name(sheetnames[0])
table = data.active
print(table.title) # 输出表名
nrows = table.max_row # 获得行数
ncolumns = table.max_column # 获得行数
values = list3
for i in range(len(values)):
    for j in range(len(values[i])):
        table.cell(nrows+i+1,j+1).value = values[i][j]
data.save('E:/数据/药材网/每日药价表.xlsx')

        
        
        
   
